pacman::p_load(tidyverse, readr, psych, st, stars, tmap, sf,
ggstatsplot, plotly, ggplot2, ggdist, dplyr, ggiraph)Exploratory Data Analysis - Temperature
1 Import Packages
2 Load the prepared files
Let’s load the RDS files after data preparation.
temperature <- readRDS("data/temperature.rds")
describe(temperature) vars n mean sd median trimmed mad min max range skew
Station* 1 3715 10.00 5.37 10.0 10.12 7.41 1.0 18.0 17.0 -0.12
Region* 2 3715 3.33 1.41 3.0 3.41 1.48 1.0 5.0 4.0 -0.24
Year 3 3715 2010.64 10.07 2013.0 2011.83 8.90 1982.0 2023.0 41.0 -0.96
Month* 4 3715 6.53 3.45 7.0 6.54 4.45 1.0 12.0 11.0 -0.01
Date 5 3715 NaN NA NA NaN NA Inf -Inf -Inf NA
MeanTemp 6 3715 27.70 0.85 27.7 27.70 0.89 24.9 30.0 5.1 -0.05
MaxTemp 7 3715 33.85 0.98 33.9 33.86 0.89 30.4 37.9 7.5 -0.11
MinTemp 8 3715 22.62 0.94 22.6 22.63 0.74 0.0 26.2 26.2 -7.42
kurtosis se
Station* -1.39 0.09
Region* -1.30 0.02
Year -0.02 0.17
Month* -1.21 0.06
Date NA NA
MeanTemp -0.40 0.01
MaxTemp 0.28 0.02
MinTemp 178.72 0.02
3 Map of Singapore
mpsz <- st_read(dsn = "data/geospatial", layer = "MPSZ-2019") %>%
st_transform(crs=3414)Reading layer `MPSZ-2019' from data source
`C:\Vanessa\SMU\Term 4 - Visual Analytics & Applications\mvheng\Group11_VAP\EDA\data\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
glimpse(mpsz)Rows: 332
Columns: 7
$ SUBZONE_N <chr> "MARINA EAST", "INSTITUTION HILL", "ROBERTSON QUAY", "JURON…
$ SUBZONE_C <chr> "MESZ01", "RVSZ05", "SRSZ01", "WISZ01", "MUSZ02", "MPSZ05",…
$ PLN_AREA_N <chr> "MARINA EAST", "RIVER VALLEY", "SINGAPORE RIVER", "WESTERN …
$ PLN_AREA_C <chr> "ME", "RV", "SR", "WI", "MU", "MP", "WI", "WI", "SI", "SI",…
$ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", "WEST…
$ REGION_C <chr> "CR", "CR", "CR", "WR", "CR", "CR", "WR", "WR", "CR", "CR",…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((33222.98 29..., MULTIPOLYGON (…
Let’s take a look at the planning areas for the 5 regions.
tmap_mode("view")
tm_shape(mpsz) +
tm_polygons(col = "REGION_N", palette = "Set2")+
tm_layout(main.title = "Planning Area",
main.title.position = "left",
main.title.size = 1,
legend.show = FALSE,
frame = FALSE) +
tmap_options(check.and.fix = TRUE) +
tm_view(set.zoom.limits = c(11,12))4 Temperature analysis
4.1 Analyse temperature using maps
Let’s map the station to the planning area (PA).
Show the code
station_to_PA <- c(
"Admiralty" = "WOODLANDS",
"Ang Mo Kio" = "ANG MO KIO",
"Boon Lay (East)" = "BOON LAY",
"Changi" = "CHANGI",
"Choa Chu Kang (South)" = "CHOA CHU KANG",
"Clementi" = "CLEMENTI",
"East Coast Parkway" = "BEDOK",
"Jurong (West)" = "JURONG WEST",
"Khatib" = "YISHUN",
"Marina Barrage" = "DOWNTOWN CORE",
"Newton" = "NEWTON",
"Pasir Panjang" = "PASIR PANJANG",
"Paya Lebar" = "PAYA LEBAR",
"Seletar" = "SELETAR",
"Sembawang" = "SEMBAWANG",
"Tai Seng" = "HOUGANG",
"Tengah" = "TENGAH",
"Tuas South" = "TUAS"
)
temperature$PA <- station_to_PA[temperature$Station]
temperature <- temperature[, c("PA", setdiff(names(temperature), "PA"))]
head(temperature)# A tibble: 6 × 9
PA Station Region Year Month Date MeanTemp MaxTemp MinTemp
<chr> <chr> <chr> <dbl> <ord> <date> <dbl> <dbl> <dbl>
1 WOODLANDS Admiralty North 2009 Jan 2009-01-01 26.3 31.9 23.3
2 WOODLANDS Admiralty North 2009 Feb 2009-02-01 26.8 33.4 23
3 WOODLANDS Admiralty North 2009 Mar 2009-03-01 26.9 34.5 22.2
4 WOODLANDS Admiralty North 2009 Apr 2009-04-01 28.1 35.1 23.7
5 WOODLANDS Admiralty North 2009 May 2009-05-01 28.5 34.7 21.8
6 WOODLANDS Admiralty North 2009 Jun 2009-06-01 28.9 34.7 23.7
temp_map <- temperature %>%
group_by(PA, Station, Year) %>%
summarise(Annual_Mean_Temperature =
mean(MeanTemp, na.rm = TRUE),
Annual_Maximum_Temperature =
max(MaxTemp, na.rm = TRUE),
Annual_Minimum_Temperature =
min(MinTemp, na.rm = TRUE)) %>%
ungroup()
glimpse(temp_map)Rows: 323
Columns: 6
$ PA <chr> "ANG MO KIO", "ANG MO KIO", "ANG MO KIO", "…
$ Station <chr> "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "…
$ Year <dbl> 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2…
$ Annual_Mean_Temperature <dbl> 27.40000, 27.71667, 27.31667, 27.50833, 27.…
$ Annual_Maximum_Temperature <dbl> 34.5, 36.0, 35.4, 34.8, 35.6, 35.0, 34.9, 3…
$ Annual_Minimum_Temperature <dbl> 21.8, 21.7, 21.5, 21.8, 20.0, 21.8, 20.3, 2…
mpsztemp <- left_join(mpsz, temp_map,
by = c("PLN_AREA_N" = "PA"))
glimpse(mpsztemp)Rows: 2,357
Columns: 12
$ SUBZONE_N <chr> "MARINA EAST", "INSTITUTION HILL", "ROBERTS…
$ SUBZONE_C <chr> "MESZ01", "RVSZ05", "SRSZ01", "WISZ01", "MU…
$ PLN_AREA_N <chr> "MARINA EAST", "RIVER VALLEY", "SINGAPORE R…
$ PLN_AREA_C <chr> "ME", "RV", "SR", "WI", "MU", "MP", "WI", "…
$ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRA…
$ REGION_C <chr> "CR", "CR", "CR", "WR", "CR", "CR", "WR", "…
$ Station <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ Year <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ Annual_Mean_Temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ Annual_Maximum_Temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ Annual_Minimum_Temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((33222.98 29...…
Let’s plot the annual mean temperature distribution across Singapore.
tm_shape(mpsztemp) +
tm_polygons(col = "Annual_Mean_Temperature",
palette = "Blues",
style = "jenks") +
tm_view(set.zoom.limits = c(11,12))
Note
It seems like the northern area of Singapore has a cooler mean temperature.
Let’s compare the maximum and minimum temperatures.
tm_shape(mpsztemp) +
tm_polygons(col = "Annual_Maximum_Temperature",
palette = "Blues",
style = "jenks") +
tm_view(set.zoom.limits = c(11,12))tm_shape(mpsztemp) +
tm_polygons(col = "Annual_Minimum_Temperature",
palette = "Blues",
style = "jenks") +
tm_view(set.zoom.limits = c(11,12))4.2 Temperature Time Series
4.2.1 Overall - Temperature Time Series
Show the code
gg <- ggplot(temperature, aes(x = Date, y = MeanTemp,
color = factor(Year))) +
geom_line(linewidth = 0.1) +
geom_point(aes(text = paste0("Month:", Month,
"<br>MeanTemp:", MeanTemp, "ºC"))) +
labs(x = "Year", y = "Monthly mean temperature (ºC)", color = "Year",
title = "Trend of Monthly Mean Temperature at Changi Station from 1981 to 2023",
subtitle = "Gentle trend line sloping upwards from 1981",
caption = "Data from Meteorological Service Singapore website") +
geom_smooth(method = "lm",
se = FALSE, color = "black") +
theme_minimal()
ggplotly(gg, tooltip = "text") %>%
layout(title = list(text =
paste0(gg$labels$title, "<br>", "<sup>",
gg$labels$subtitle, "</sup>"),
font = list(weight = "bold")),
showlegend = FALSE,
annotations = list(text = gg$labels$caption,
xref = "paper", yref = "paper",
x = 1000, y = 24,
xanchor = "right", yanchor = "top",
showarrow = FALSE)) 4.2.2 Temperature Time Series by station
Temp_station <- temperature %>%
group_by(Station, Year) %>%
summarise(Temp = mean(MeanTemp, na.rm = TRUE)) %>%
ungroup()
Temp_station$mean_tooltip <- c(paste0(
"Year: ", Temp_station$Year,
"\n Station: ", Temp_station$Station,
"\n Mean Temp: ", Temp_station$Temp, "°C"))
line <- ggplot(data = Temp_station,
aes(x = Year,
y = Temp,
group = Station,
color = Station,
data_id = Station)) +
geom_line_interactive(size = 1.2,
alpha = 0.4) +
geom_point_interactive(aes(tooltip = Temp_station$mean_tooltip),
fill = "white",
size = 1,
stroke = 1,
shape = 21) +
theme_classic() +
ylab("Annual Mean Temperature (°C)") +
xlab("Year") +
ggtitle("Annual Average of Mean Temperatures") +
theme(plot.title = element_text(size = 10),
plot.subtitle = element_text(size = 8))
girafe(ggobj = line,
width_svg = 8,
height_svg = 6 * 0.618,
options = list(
opts_hover(css = "stroke-width: 2.5; opacity: 1;"),
opts_hover_inv(css = "stroke-width: 1;opacity:0.6;")))4.3 COnfidence Interval of Mean Temperature
Show the code
Temp_yr_error <- temperature %>%
group_by(Year) %>%
summarise(n = n(), Temp = mean(MeanTemp, na.rm = TRUE),
sd = sd(MeanTemp, na.rm = TRUE)) %>%
mutate(se = sd/sqrt(n-1)) %>%
ungroup()
model <- lm(Temp ~ Year, Temp_yr_error)
y_intercept = coef(model)[1]
slope_coeff = coef(model)[2]
adjust_yintercept = slope_coeff * 1982 + y_intercept
gg <- ggplot(Temp_yr_error) +
geom_errorbar(aes(x = factor(Year), ymin = Temp - 2.58 * se,
ymax = Temp+2.58*se),
width=0.2, colour="black",
alpha=0.9, size=0.5) +
geom_point(aes(x = factor(Year), y = Temp,
text = paste0("Year:", `Year`,
"<br>Avg. Temp:", round(Temp, digits = 2),
"<br>95% CI:[",
round((Temp - 2.58 * se), digits = 2), ",",
round((Temp + 2.58 * se), digits = 2),"]")),
stat="identity", color="darkred",
size = 1.5, alpha = 1) +
geom_abline(slope = round(slope_coeff, 4),
intercept = adjust_yintercept,
untf = TRUE,
color = "blue",
linetype = "dashed")+
geom_text(aes(x = 11, y = 27.8, colour = "blue",
label = paste0("Temp=",
round(slope_coeff, 4), "* Year ",
round(y_intercept, 4)))) +
labs (x = "Year", y = "Annual mean temperatures (°C)",
title = "99% Confidence interval of annual mean temperatures by year",
subtitle = "From 1982 to 2023",
caption = "Data from Meteorological Service Singapore website") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1),
plot.title = element_text(face = "bold", size = 12))
ggplotly(gg, tooltip = "text") %>%
layout(title = list(text =
paste0(gg$labels$title, "<br>", "<sup>",
gg$labels$subtitle, "</sup>"),
font = list(weight = "bold")),
showlegend = FALSE)
Note
We can observe that the mean temperature over the years have increased and the confidence intervals have narrowed.
4.4 Temperature across the months
4.4.1 Box plot across the months
Show the code
gg <- ggplot(temperature,
aes(x = factor(Month, levels = month.abb), y = MeanTemp)) +
geom_violin(color = "navy", fill = "lightblue") +
geom_hline(data = temperature,
aes(yintercept = mean(MeanTemp, na.rm = TRUE)),
linetype = "dashed", size = 1, colour = "brown") +
geom_text(aes(x = 4.5, y = 27.3,
label = paste0("Mean : ",
round(mean(MeanTemp,na.rm = TRUE),2), "°C")),
colour = "brown") +
stat_summary(fun = mean, geom = "point",
shape = 20, size = 3, color = "orange",
aes(text = paste0("Mean : ", round(after_stat(y), 2), "°C"))) +
theme_minimal() +
labs(title = "Daily mean temperature across each month from 1981 to 2023",
subtitle = "November to February are cooler as compared to the rest of the year",
y = "Daily mean Temperatures (°C)",
x = "Month",
caption = "Data from Meteorological Service Singapore website")
ggplotly(gg, tooltip = "text") %>%
layout(title = list(text =
paste0(gg$labels$title, "<br>", "<sup>",
gg$labels$subtitle, "</sup>"),
font = list(weight = "bold")))
Note
We can observe that temperature in November to February are below the average temperature.
4.4.2 Heatmap across the months
Show the code
Temp <- temperature %>%
group_by(Year, Month) %>%
summarise(MTemp = mean(MeanTemp, na.rm = TRUE))
gg <- ggplot(Temp, aes(factor(Month, levels = month.abb), factor(Year),
fill = MTemp)) +
geom_tile(color = "white",
aes(text = paste0(Year, "-", Month,
"<br>Temp:", round(MTemp, 2), "°C"))) +
theme_minimal() +
scale_fill_gradient(name = "Temperature",
low = "sky blue",
high = "dark blue") +
labs(x = NULL, y = NULL,
title = "Mean temperatures by year and month",
subtitle = "Hotter in more months of 2023 as compared to the other years")
ggplotly(gg, tooltip = "text")
Note
We can observe that temperature in May and June are consistently high across the years.